import requests
from bs4 import BeautifulSoup
import pandas as pd

# 设置请求头，模拟浏览器访问
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'}

# 获取网页源代码
url = 'https://top.baidu.com/board?tab=realtime'
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
html_code = response.text

# 使用BeautifulSoup解析页面内容
soup = BeautifulSoup(html_code, 'lxml')

# 在网页源代码中定位标签
titles = soup.select('div.c-single-text-ellipsis')
indices = soup.select('div.hot-index_1Bl1a')

# 从标签中提取数据
data = []
for title, index in zip(titles, indices):
    title_text = title.get_text().strip()
    index_text = index.get_text().strip()
    data.append([title_text, index_text])

# 整理和导出数据
df = pd.DataFrame(data, columns=['热搜标题', '热搜指数'])
df.to_csv('百度热搜.csv', index=False, encoding='utf-8-sig')
